Gradienten er
\(\begin{bmatrix} \frac{1}{n} \sum_i^n 2(a + bs_i - d_i) \\ \frac{1}{n} \sum_i^n 2(a + bs_i -d_i)s_i \end{bmatrix}\)
ab <- c(-20,3)
n <- 50
ms <- function(ab) {
ab[1] + ab[2] * cars$speed
}
f_i <- function(ab) {
(ms(ab) - cars$dist)^2
}
f <- function(ab) {
1/n * sum(f_i(ab))
}
d_f <- function(ab) {
grad(f,ab)
}
dd_f <- function(ab) {
hessian(f,ab)
}
backtrack <- function(a_bar = 3, rho = 0.3, c = 0.2, func = f, Dfunc = d_f, x_k = ab, c_2 = 0.5) {
a <- a_bar
itt <- 0
while ( (func(x_k + a * (-Dfunc(x_k)) ) ) > (func(x_k) + c * a * t(Dfunc(x_k)) %*% (-Dfunc(x_k))) ) {
# while (t(Dfunc(x_k + a * -Dfunc(x_k))) %*% -Dfunc(x_k) < c_2 * t(Dfunc(x_k)) %*% -Dfunc(x_k) ) {
itt <- itt + 1
a <- rho * a
# }
}
a
}
#params
c1 <- 0.001
c2 <- 0.9
a_lo <- 0.001
a_hi <- 0.08
a_0 <- 0.1
# Implementation of Algorithm 3.6
# (Zoom)
zoom <- function(x_k, a_lo, a_hi, c1, c2, func = f, g = d_f) {
f_k <- func(x_k)
g_k <- g(x_k)
p_k <- -g_k
k <- 0
k_max <- 1000 # Maximum number of iterations.
done <- FALSE
while(!done) {
k <- k + 1
phi_lo <- func(x_k + a_lo*p_k)
a_k <- 0.5*(a_lo + a_hi)
phi_k <- func(x_k + a_k*p_k)
dphi_k_0 <- g_k%*%p_k
l_k <- f_k + c1*a_k*dphi_k_0
if ((phi_k > l_k) | (phi_k >= phi_lo)) {
a_hi <- a_k
} else {
dphi_k <- p_k %*% g(x_k + a_k*p_k)
if (abs(dphi_k) <= -c2*dphi_k_0) {
return(a_k)
}
if (dphi_k*(a_hi - a_lo) >= 0) {
a_hi <- a_lo
}
a_lo <- a_k
}
done <- (k > k_max)
}
return(a_k)
}
alpha <- function(a_0, x_k, c1, c2, func = f, g = d_f) {
a_max <- 4*a_0 # Maximum step length. Can also be given as argument.
f_k <- func(x_k)
phi_k <- f_k
a_1 <- a_0
a0 <- 0
a_k <- a_1
a_k_old <- a0
k <- 0
k_max <- 1000 # Maximum number of iterations.
done <- FALSE
while(!done) {
k <- k + 1
f_k <- func(x_k)
g_k <- g(x_k)
p_k <- -g_k
phi_k_old <- phi_k
phi_k <- func(x_k + a_k*p_k)
dphi_k_0 <- g_k%*%p_k
l_k <- f_k + c1*a_k*dphi_k_0
if ((phi_k > l_k) || ((k > 1) && (phi_k >= phi_k_old))) {
return(zoom(x_k, a_k_old, a_k, c1, c2))
}
dphi_k <- p_k %*% g(x_k + a_k*p_k)
if (abs(dphi_k) <= -c2*dphi_k_0) {
return(a_k)
}
if (dphi_k >= 0) {
return(zoom(a_k, a_k_old, x_k, c1, c2))
}
a_k_old <- a_k
a_k <- rho*a_k + (1 - rho)*a_max # e.g. rho <- 0.5
done <- (k > k_max)
}
return(a_k)
}
#steepest descent
optimer_identitet_backtrack <- function(x_0 = ab) {
x_k <- x_0
a_k <- 2
itt <- 0
steps <- c()
plot(dist ~ speed, cars)
while (norm(t(d_f(x_k)), type = "2") > 1e-5) {
itt <- 1 + itt
p_k <- -d_f(x_k)
a_k <- backtrack(a_k, rho = 0.5, c = 0.001, f, d_f, x_k)
x_k <- x_k + a_k * t(p_k)
steps[itt] <- a_k
if (itt %% 500 == 0) {
abline(x_k)
}
if (itt == 100000) {
break
}
}
cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
# optimer_identitet_backtrack()
optimer_identitet_zoom <- function(x_0 = ab) {
x_k <- x_0
a_k <- 2
itt <- 0
steps <- c()
plot(dist ~ speed, cars)
while (norm(t(d_f(x_k)), type = "2") > 1e-5) {
itt <- 1 + itt
p_k <- -d_f(x_k)
a_k <- alpha(a_0, x_k, c1,c2)
x_k <- x_k + a_k * t(p_k)
steps[itt] <- a_k
if (itt %% 500 == 0) {
abline(x_k)
}
if (itt == 100000) {
break
}
}
cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
# optimer_identitet_zoom()
#newton
optimer_newton_backtrack <- function(x_0 = ab, output = TRUE) {
x_k <- x_0
a_k <- 2
itt <- 0
steps <- c()
x_plot <- c()
y_plot <- c()
plot(dist ~ speed, cars)
while (norm(t(d_f(x_k)), type = "2") > 1e-4) {
itt <- 1 + itt
p_k <- solve(dd_f(x_k), -d_f(x_k))
a_k <- backtrack(a_k, rho = 0.5, c = 0.01, f, d_f, x_k)
x_k <- x_k + a_k * t(p_k)
if (itt %% 50 == 0) {
abline(x_k)
x_plot[itt/50] <- x_k[1]
y_plot[itt/50] <- x_k[2]
}
steps[itt] <- a_k
if (itt == 100000) {
break
}
}
if (output == TRUE) {
cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
else
cbind(x_plot, y_plot)
}
xy_backtrack <- optimer_newton_backtrack(output = FALSE)
optimer_newton_backtrack(output = TRUE)
## x* = -17.5791 3.932409 f(x*) = 227.0704 iteration = 7956 steps = 0.001953125 0.001953125 0.001953125 0.001953125 0.001953125
#newton
optimer_newton_zoom <- function(x_0 = ab, output = TRUE) {
x_k <- x_0
a_k <- 2
itt <- 0
steps <- c()
x_plot <- c()
y_plot <- c()
plot(dist ~ speed, cars)
while (norm(t(d_f(x_k)), type = "2") > 1e-4) {
itt <- 1 + itt
p_k <- solve(dd_f(x_k), -d_f(x_k))
a_k <- alpha(a_0, x_k, c1,c2)
x_k <- x_k + a_k * t(p_k)
if (itt %% 50 == 0) {
abline(x_k)
x_plot[itt/50] <- x_k[1]
y_plot[itt/50] <- x_k[2]
}
steps[itt] <- a_k
if (itt == 100000) {
break
}
}
if (output == TRUE) {
cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
else
cbind(x_plot, y_plot)
}
xy_zoom <- optimer_newton_zoom(output = FALSE)
optimer_newton_zoom(output = TRUE)
## x* = -17.5791 3.932409 f(x*) = 227.0704 iteration = 4970 steps = 0.003125 0.003125 0.003125 0.003125 0.003125
benchmark_result <- microbenchmark(optimer_identitet_backtrack(), optimer_identitet_zoom(), optimer_newton_backtrack(), optimer_newton_zoom(), times = 1)
## x* = -17.5791 3.932409 f(x*) = 227.0704 iteration = 4970 steps = 0.003125 0.003125 0.003125 0.003125 0.003125
## x* = -17.5791 3.932409 f(x*) = 227.0704 iteration = 7956 steps = 0.001953125 0.001953125 0.001953125 0.001953125 0.001953125
## x* = -17.57914 3.932412 f(x*) = 227.0704 iteration = 26768 steps = 0.001953125 0.001953125 0.001953125 0.001953125 0.001953125
## x* = -17.57913 3.932411 f(x*) = 227.0704 iteration = 12869 steps = 0.003125 0.003125 0.003125 0.003125 0.003125
ms_plot <- function(x,y) {
x + y * cars$speed
}
f_i_plot <- function(x,y) {
(ms_plot(x,y) - cars$dist)^2
}
f_plot <- function(x,y) {
1/n * sum(f_i_plot(x,y))
}
fkts_vaerdier_backtrack <- apply(xy_backtrack, 1, f)
# x <- seq(-20,-17,length = 200)
# y <- seq(2.9,4.2, length = 200)
x <- xy_backtrack[,1]
y <- xy_backtrack[,2]
z <- outer(x, y, FUN = Vectorize("f_plot"))
plot_ly(x = x, y = y, z = ~z) %>% add_surface() %>%
add_trace(x = xy_backtrack[,1], y = xy_backtrack[,2], z = fkts_vaerdier_backtrack)
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
fkts_vaerdier_zoom <- apply(xy_zoom, 1, f)
# x <- seq(-20,-17,length = 200)
# y <- seq(2.9,4.2, length = 200)
x <- xy_zoom[,1]
y <- xy_zoom[,2]
z <- outer(x, y, FUN = Vectorize("f_plot"))
plot_ly(x = x, y = y, z = ~z) %>% add_surface() %>%
add_trace(x = xy_zoom[,1], y = xy_zoom[,2], z = fkts_vaerdier_zoom)
## No trace type specified:
## Based on info supplied, a 'scatter3d' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter3d
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
summary(lm(cars$dist ~ cars$speed))
##
## Call:
## lm(formula = cars$dist ~ cars$speed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.069 -9.525 -2.272 9.215 43.201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.5791 6.7584 -2.601 0.0123 *
## cars$speed 3.9324 0.4155 9.464 1.49e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared: 0.6511, Adjusted R-squared: 0.6438
## F-statistic: 89.57 on 1 and 48 DF, p-value: 1.49e-12
optim(ab, f, hessian = TRUE)
## $par
## [1] -17.583175 3.932699
##
## $value
## [1] 227.0704
##
## $counts
## function gradient
## 53 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
##
## $hessian
## [,1] [,2]
## [1,] 2.0 30.80
## [2,] 30.8 529.12
phi_lig_nul <- function(x_k) {
1/n * sum((x_k[2] * cars$speed + x_k[1] - cars$dist) /(d_f(x_k)[2] * cars$speed + d_f(x_k)[1]))
}
optimer_identitet_phi <- function(x_0 = ab) {
x_k <- x_0
a_k <- 2
itt <- 0
steps <- c()
plot(dist ~ speed, cars)
while (norm(t(d_f(x_k)), type = "2") > 1e-5) {
itt <- 1 + itt
p_k <- -d_f(x_k)
a_k <- phi_lig_nul(x_k)
x_k <- x_k + a_k * t(p_k)
steps[itt] <- a_k
if (itt %% 1 == 0) {
abline(x_k)
}
if (itt == 100000) {
break
}
}
cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
# optimer_identitet_phi()
optimer_newton_phi <- function(x_0 = ab) {
x_k <- x_0
a_k <- 2
itt <- 0
steps <- c()
x_plot <- c()
f_plot <- c()
plot(dist ~ speed, cars)
while (norm(t(d_f(x_k)), type = "2") > 1e-5) {
itt <- 1 + itt
p_k <- solve(dd_f(x_k), -d_f(x_k))
a_k <- phi_lig_nul(x_k)
x_k <- x_k + a_k * t(p_k)
if (itt %% 50 == 0) {
abline(x_k)
}
steps[itt] <- a_k
if (itt == 100000) {
break
}
}
cat("x* = ", x_k, "f(x*) =", f(x_k), "iteration =", itt, "steps = ", steps[1:5])
}
# optimer_newton_phi()